Link National Oil Company Database
library(readr)
library(tidyverse)
library(ggplot2)
library(plotly)
library(lubridate)
options (scipen = 999)
#cargo la base
nocs_full <- read_csv( "../data/nocs/NRGI-NOCdatabase-FullDataset.csv") %>%
mutate(observation = as.double(observation),
anio = (as.Date(parse_date_time(year, orders = "%Y"))))
hidrocarburos_df <- read_csv("../data/balances/hidrocarburos_df.csv") %>%
select(-"...1")
Variables que contiene la base
variables = unique(nocs_full$indicatorName)
variables
[1] "Any shares publicly traded?"
[2] "Auditor opinion issued with qualification?"
[3] "Bonus payments"
[4] "Capex (company-wide) per barrel"
[5] "Capex / total revenue"
[6] "Capital expenditures"
[7] "Cash and cash equivalents"
[8] "Cash flows from financing activities"
[9] "Cash flows from investing activities"
[10] "Cash flows from operating activities"
[11] "Cash ratio"
[12] "Country released EITI report for this year?"
[13] "Current assets"
[14] "Current liabilities"
[15] "Dividends"
[16] "Domestic oil, gas & product sales"
[17] "Domestic sales"
[18] "Employees"
[19] "Equity"
[20] "Exchange rate"
[21] "Exploration - Seismic (2D)"
[22] "Exploration - Seismic (3D and 4D)"
[23] "External oil, gas & product sales"
[24] "External sales"
[25] "Fiscal payments from contractors"
[26] "Gas production"
[27] "Gas production of home country"
[28] "GDP"
[29] "General government revenue"
[30] "General government total expenditure"
[31] "Government gross debt"
[32] "Government resource revenue / general government revenue"
[33] "Government transfers"
[34] "Income tax"
[35] "Longterm/fixed assets"
[36] "National gas reserves"
[37] "National oil and gas reserves"
[38] "National oil reserves"
[39] "Natural capital, subsoil assets: oil & gas"
[40] "Net income after taxes"
[41] "Net income after taxes/equity"
[42] "Net income after taxes/revenue"
[43] "Net income after taxes / total assets"
[44] "Net income from all revenues (before transfers to govt.)"
[45] "Net income from core revenues (before transfers to govt.)"
[46] "Net income per employee"
[47] "Net income / total revenue (before transfers to govt.)"
[48] "NOC debt / government gross debt"
[49] "NOC has stake in fields abroad?"
[50] "NOC net income / general government revenue"
[51] "NOC oil and gas production / oil and gas production of home country"
[52] "NOC operator of any production?"
[53] "NOC operator of fields abroad?"
[54] "NOC reserves / national oil and gas reserves"
[55] "NOC total assets / total national wealth"
[56] "NOC total revenues / GDP"
[57] "NOC total revenues / general government revenues"
[58] "NOC transfers to government / general government revenue"
[59] "NOC transfers to government / NOC net income"
[60] "NOC transfers to government / total NOC revenues"
[61] "NOC transfers to government / total public expenditures"
[62] "O&G production / total reserves"
[63] "O&G revenue per barrel of production"
[64] "O&G revenue per barrel of reserves"
[65] "O&G revenue per employee"
[66] "Oil and gas production of home country"
[67] "Oil & gas production"
[68] "Oil, gas & product sales"
[69] "Oil production"
[70] "Oil production of home country"
[71] "Operational expenditures"
[72] "Opex (company-wide) per barrel"
[73] "Opex / total revenue"
[74] "Other transfers to government"
[75] "Percentage non-core activities in total revenue"
[76] "Proceeds of state profit/equity petroleum"
[77] "Production on which NOC is operator"
[78] "Production per employee"
[79] "Report audited by independent external auditor?"
[80] "Report presented according to International Financial Reporting Standards (IFRS)?"
[81] "Reserves"
[82] "Reserves per employee"
[83] "Reserves / production ratio"
[84] "Return on capital employed (before transfers to govt.)"
[85] "Revenue - Non core activities"
[86] "Rigs"
[87] "Royalties"
[88] "Total assets"
[89] "Total liabilities"
[90] "Total longterm/fixed liabilities"
[91] "Total national wealth"
[92] "Total revenue"
[93] "Total revenue per barrel of production"
[94] "Total revenue per employee"
[95] "Total transfers to government"
[96] "Transfers to government per barrel"
[97] "Wells drilled"
Lista de todas las empresas
#empresas disponibles
unique(nocs_full$company)
Cantidad de empresas
length(unique(nocs_full$company))
#filtro exploratorio de empresas
filtro_empresas <- c("YPF", "Petrobras", "YPFB", "PDVSA", "Saudi Aramco")
#filtro con empresas de América Latina
empresas_AL <- nocs_full %>%
filter(region == "Latin America/Caribbean") %>%
select(company, country)
empresas_AL <- empresas_AL[!duplicated(empresas_AL), ]
empresas_AL
NA
ggplotly(production_graf)
The shape palette can deal with a maximum of 6 discrete values because more than 6 becomes difficult to discriminate; you have
61. Consider specifying shapes manually if you must have them.
assets_graf <- nocs_full %>%
filter(indicatorName %in% c( "Total assets"),
year == 2017, units == "USD million",
!is.na(observation)) %>%
ggplot(aes(x = reorder(company,-observation) , y= observation, fill = company))+
geom_col(position = "stack")+
labs(title = "Total assets",
x = "Empresa", y = "MM USD")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))
# facet_grid(indicatorName~region, labeller = label_wrap_gen(width=17))
ggplotly(assets_graf)
NA
assets_graf <- nocs_full %>%
filter(indicatorName %in% c( "Equity"),
year == 2017, units == "USD million",
!is.na(observation)) %>%
ggplot(aes(x = reorder(company,-observation) , y= observation, fill = company))+
geom_col(position = "stack")+
labs(title = "Total Equity", subtitle= "Year 2017",
x = "Empresa", y = "MM USD")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))
# facet_grid(indicatorName~region, labeller = label_wrap_gen(width=17))
ggplotly(assets_graf)
capex_graf <- nocs_full %>%
filter(indicatorName %in% c("Capital expenditures"), units == "USD million") %>%
ggplot(aes(x = year, y= observation, color = company))+
geom_line(aes(linetype = country))+
labs(title = "Capital expenditures (CAPEX)",
x = "Año", y = "MM USD")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1),
strip.text.y = element_text(size = 7))+
facet_grid(~region, labeller = label_wrap_gen(width=15))
ggplotly(capex_graf)
#para cĂ¡lculo de rentabilidad
nocs_df <- nocs_full %>%
select(year, company, region, country, productionGroup, indicatorName, units, observation) %>%
filter(indicatorName %in% c("Longterm/fixed assets", "Cash and cash equivalents",
"Net income after taxes", "Income tax",
"Equity","Total assets",
"Total liabilities" , "Current liabilities" ),
units == "USD million") %>%
mutate(row_id = 1:nrow(.)
# , observation = as.double(observation)
) %>%
spread(.,
key = indicatorName,
value = observation) %>%
select(-c(row_id)) %>%
rename( fixed_assets = "Longterm/fixed assets", cash_and_equi = "Cash and cash equivalents",
net_income_after_tax = "Net income after taxes" , income_tax = "Income tax",
equity = "Equity", assets = "Total assets",
liabilities = "Total liabilities", current_liabilities = "Current liabilities") %>%
mutate(net_income_after_tax = as.double(net_income_after_tax),
fixed_assets = as.double(fixed_assets),
cash_and_equi = as.double(cash_and_equi),
income_tax = as.double(income_tax),
equity = as.double(equity),
liabilities = as.double(liabilities),
year = parse_date_time(year, orders = "y")) %>%
group_by(year, company, country, region, productionGroup, units) %>%
summarise(fixed_assets = sum(fixed_assets, na.rm = T),
cash_and_equi = sum(cash_and_equi, na.rm = T),
net_income_after_tax = sum(net_income_after_tax, na.rm = T),
income_tax = sum(income_tax, na.rm = T),
assets = sum(assets, na.rm = T),
liabilities = sum(liabilities, na.rm = T),
current_liabilities = sum(current_liabilities, na.rm = T),
equity = sum(equity, na.rm = T),
KTA = cash_and_equi + fixed_assets,
tg_beforetax = (income_tax + net_income_after_tax) / KTA,
tg_aftertax = net_income_after_tax / KTA,
ratio_endeudamiento = liabilities / equity,
ratio_endeudamiento_cp = current_liabilities/equity,
pasivo_activo = liabilities/assets,
ratio_solvencia = equity /liabilities)
# nocs_df %>% arrange(-tg_aftertax) %>%
# select(company, KTA, tg_aftertax)
nocs_df
nocs_df[sapply(nocs_df, is.infinite)] <- NA
tg_media <- nocs_df %>%
group_by(year) %>%
summarise(
tg_media_before_tax = sum(income_tax, net_income_after_tax, na.rm = T)/
sum(fixed_assets, cash_and_equi, na.rm = T),
tg_media_after_tax = sum(net_income_after_tax, na.rm = T) /
sum(fixed_assets, cash_and_equi, na.rm = T)) %>%
# tg_media_before_tax_mean = sum(mean(income_tax, na.rm = T),
# mean(net_income_after_tax, na.rm = T), na.rm = T)/
# sum(mean(fixed_assets, na.rm = T),
# mean(cash_and_equi, na.rm = T), na.rm = T),
# tg_media_after_tax_mean = mean(net_income_after_tax, na.rm = T)/
# sum(mean(fixed_assets, na.rm = T),
# mean(cash_and_equi, na.rm = T), na.rm = T)) %>%
gather(.,
key = variable,
value = valor,
2:3)
# mutate(metodo = case_when( str_sub(variable[-3])))
tg_media_graf <- tg_media %>%
# filter(variable == "tg_media_before_tax_mean") %>%
ggplot(aes(year, valor, color = variable))+
geom_line()+
labs(title = "NOC's. Tasa de Ganancia media")+
theme(legend.position = "bottom")+
scale_y_continuous(labels = scales::percent)
tg_media_graf
NA
nocs_df_con_media <- nocs_df %>%
bind_rows(tg_media %>%
spread(key = variable,
value = valor) %>%
mutate(company = "TG Media") %>%
rename(tg_beforetax = tg_media_before_tax,
tg_aftertax = tg_media_after_tax)) %>%
mutate(is_media = case_when(company == "TG Media" ~ "media",
TRUE ~ "empresas"))
write.csv(nocs_df_con_media, file = "nocs_df_con_media.csv")
graf_media_line <- nocs_df_con_media %>%
ggplot(aes(year, tg_aftertax, color = company, group = is_media))+
geom_line()+
geom_point()+
labs(title = "NOC's. Rentabilidad media y del resto de las empresas después de impuestos")+
theme(legend.position = "none")+
scale_y_continuous(labels = scales::percent, limits = c(-0.30, 0.70),breaks = seq(-0.30, 0.70, .1))
graf_media_line
graf_media_box <- nocs_df %>%
# ggplot(aes(year, tg_aftertax, color = company, size = is_media))+
ggplot(aes(year, tg_aftertax, color = company, group = year))+
geom_boxplot()+
# ggplot(aes(data = nocs_df_con_media %>% filter(company == "TG Media"), year, tg_aftertax))+
# geom_line()+
labs(title = "NOC's. Rentabilidad despues de impuestos")+
theme(legend.position = "none")+
scale_y_continuous(labels = scales::percent, limits = c(-0.3, .8), breaks = seq(-0.3, .8, 0.1))
graf_media_box
summary(nocs_df %>%
group_by(year, productionGroup) %>%
summarise(tg_aftertax = mean(tg_aftertax, na.rm = T)) %>%
spread(.,
key = productionGroup,
value = tg_aftertax))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
year Internationalized operators Large domestic producers Medium domestic producers Pre-production NOCs
Min. :2011-01-01 00:00:00 Min. :0.0921 Min. :0.1214 Min. :0.03228 Min. :-0.19649
1st Qu.:2012-10-01 12:00:00 1st Qu.:0.1124 1st Qu.:0.1677 1st Qu.:0.13562 1st Qu.:-0.03020
Median :2014-07-02 12:00:00 Median :0.1327 Median :0.2683 Median :1.35633 Median : 0.05986
Mean :2014-07-02 12:00:00 Mean :0.1424 Mean :0.2681 Mean :1.46370 Mean : 0.20272
3rd Qu.:2016-04-01 12:00:00 3rd Qu.:0.1706 3rd Qu.:0.3280 3rd Qu.:2.72462 3rd Qu.: 0.13376
Max. :2018-01-01 00:00:00 Max. :0.2127 Max. :0.4527 Max. :3.17724 Max. : 1.48343
Small domestic producers
Min. :-7.1290
1st Qu.: 0.0129
Median : 0.2032
Mean :-0.6546
3rd Qu.: 0.3530
Max. : 0.7303
tg_graf_prod_group <- nocs_df %>%
# filter(tg_aftertax != 0 & fixed_assets > 0 & tg_aftertax < 5 & tg_aftertax > -20 ) %>%
ggplot(aes(year, tg_aftertax, color = company))+
geom_line()+
labs(title = "NOC's. Rentabilidad despues de impuestos. DivisiĂ³n por tamaño")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))+
scale_y_continuous(labels = scales::percent, limits = c(-0.3, .8), breaks = seq(-0.3, .8, 0.20))+
facet_grid(~productionGroup, labeller = label_wrap_gen(width=17))
plotly::ggplotly(tg_graf_prod_group)
tg_graf_xregion <- nocs_df %>%
filter(tg_aftertax != 0 & fixed_assets > 0 & tg_aftertax < 1 & tg_aftertax > -20 ) %>%
ggplot(aes(year, tg_aftertax, color = company))+
geom_line()+
labs(title = "NOC's. Rentabilidad despues de impuestos. DivisiĂ³n por regiĂ³ns")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1))+
scale_y_continuous(labels = scales::percent)+
facet_grid(~region, labeller = label_wrap_gen(width=17))
plotly::ggplotly(tg_graf_xregion)
NA
ggplotly(tg_graf_ypf)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Removed 8 rows containing non-finite values (stat_smooth).
# ruta_graf_tg <- "tg_filtro_graf.png"
# png(ruta_graf_tg)
# print(tg_filtro_graf)
# dev.off()
#CorrelaciĂ³n entre KTA y ganancias despuĂ©s de impuestos
cor(x = nocs_df$KTA,
y = nocs_df$net_income_after_tax, use = "complete.obs",
method = "spearman")
[1] 0.7450791
cor(x = nocs_spread_2$capex,
y = nocs_spread_2$net_income, use = "complete.obs",
method = "spearman")
[1] 0.6563327
\[ Solvencia = \frac{Patrimonio Neto}{Pasivo}\]
nocs_df %>%
ggplot(aes(year, ratio_solvencia ,group = year))+
geom_boxplot() +
# geom_point(aes(color = company))+
# geom_line(aes(group = company))+
geom_line(data = nocs_df %>% filter(company == "YPF"),
aes(year, ratio_solvencia,color = "red", group = company), size = 1)+
theme(legend.position = "none")+
labs(title = "DistribuciĂ³n de ratio de solvencia para todas las empresas e YPF")
ggplot(nocs_df %>% filter(company == "YPF"))+
geom_line( aes(year, ratio_solvencia, group = company))+
labs(title = "Ratio de solvencia de YPF")
solvencia_graf <- nocs_df %>%
filter(ratio_solvencia >=0 & ratio_solvencia < 4) %>%
ggplot(aes(year, ratio_solvencia, color = company))+
geom_point(alpha = 0.5)+
labs(title = "Ratio de solvencia todas las empresas")+
theme(legend.position = "none")+
# scale_y_continuous(labels = scales::percent)+
facet_wrap(~productionGroup)
ggplotly(solvencia_graf)
solvencia_graf_2 <- nocs_df %>%
# filter(ratio_solvencia >=0 & ratio_solvencia < 4) %>%
filter(company %in% empresas_AL$company) %>%
ggplot(aes(year, ratio_solvencia, color = company))+
geom_point(alpha = 0.5)+
labs(title = "Ratio de solvencia de empresas de AL")+
theme(legend.position = "none")+
# scale_y_continuous(labels = scales::percent)+
facet_wrap(~productionGroup)
ggplotly(solvencia_graf_2)
solvencia_filtro_graf <- filtro_emp_df %>%
# filter(ratio_solvencia >=0 & ratio_solvencia < 4) %>%
ggplot(aes(year, ratio_solvencia, color = company))+
geom_line()+
geom_point(alpha = 1, size = 3)+
labs(title = "Ratio de solvencia empresas seleccionadas")+
theme(legend.position="bottom")
# scale_y_continuous(labels = scales::percent)+
solvencia_filtro_graf
\[ Endeudamiento = \frac{Pasivo}{Patrimonio Neto} \]
nocs_df %>%
ggplot(aes(year, ratio_endeudamiento ,group = year))+
geom_boxplot() +
# geom_point(aes(color = company))+
# geom_line(aes(group = company))+
geom_line(data = nocs_df %>% filter(company == "YPF"),
aes(year, ratio_endeudamiento,color = "red", group = company), size = 1)+
theme(legend.position = "none")+
labs(title = "DistribuciĂ³n de ratio de endeudamiento de NOC's e YPF")+
ylim(NA, 15 )
ggplot(nocs_df %>% filter(company == "YPF"))+
geom_line( aes(year, ratio_endeudamiento, group = company))+
labs(title = "Ratio de endeudamiento de YPF")
nocs_df %>%
ggplot(aes( ratio_endeudamiento ))+#,group = year))+
geom_density() +
labs(title = "DistribuciĂ³n de ratio de endeudamiento de NOC's")+
facet_wrap(~year(year), scales = "free")
# tapply(nocs_df$ratio_endeudamiento, nocs_df$year, summary)
library(data.table)
nocs_dt = as.data.table(nocs_df)
nocs_dt[, as.list(summary(ratio_endeudamiento)), by =year]
nocs_df %>%
ggplot(aes(year, pasivo_activo ,group = year))+
geom_boxplot() +
# geom_point(aes(color = company))+
# geom_line(aes(group = company))+
geom_line(data = nocs_df %>% filter(company == "YPF"),
aes(year, pasivo_activo,color = "red", group = company), size = 1)+
theme(legend.position = "none")+
labs(title = "DistribuciĂ³n de Pasivo/Activo de NOC's e YPF")
# ylim(NA, 15 )
ggplot(nocs_df %>% filter(company == "YPF"))+
geom_line( aes(year, pasivo_activo, group = company))+
labs(title = "Pasivo/Activo de YPF")
nocs_df %>%
ggplot(aes(year, ratio_endeudamiento_cp ,group = year))+
geom_boxplot() +
# geom_point(aes(color = company))+
# geom_line(aes(group = company))+
geom_line(data = nocs_df %>% filter(company == "YPF"),
aes(year, ratio_endeudamiento_cp,color = "red", group = company), size = 1)+
theme(legend.position = "none")+
labs(title = "DistribuciĂ³n de ratio de endeudamiento CP de NOC's e YPF")+
ylim(NA, 12.5 )
ggplot(nocs_df %>% filter(company == "YPF"))+
geom_line( aes(year, ratio_endeudamiento_cp, group = company))+
labs(title = "Ratio de endeudamiento CP de YPF")
NA
NA
endeudamiento_graf <- nocs_df %>%
filter(ratio_endeudamiento <10 & ratio_endeudamiento > -3 ) %>%
ggplot(aes(year, ratio_endeudamiento, color = company))+
geom_point(alpha = 0.5)+
labs(title = "Ratio de endeudamiento todas las empresas")+
theme(legend.position = "none")+
# scale_y_continuous(labels = scales::percent)+
facet_wrap(~productionGroup)
ggplotly(endeudamiento_graf)
endeudamiento_graf_al <- nocs_df %>%
filter(ratio_endeudamiento <30 ) %>%
filter(company %in% empresas_AL$company ) %>%
ggplot(aes(year, ratio_endeudamiento, color = company))+
geom_point(alpha = 0.5)+
labs(title = "Ratio de endeudamiento empresas AL")+
theme(legend.position = "none")+
# scale_y_continuous(labels = scales::percent)+
facet_wrap(~productionGroup)
ggplotly(endeudamiento_graf_al)
endeudamiento_filtro_graf <- filtro_emp_df %>%
# filter(ratio_solvencia >=0 & ratio_solvencia < 4) %>%
ggplot(aes(year, ratio_endeudamiento, color = company))+
geom_point(alpha = 1, size = 3)+
geom_line()+
labs(title = "Ratio de endeudamiento empresas seleccionadas")+
theme(legend.position="bottom")
# scale_y_continuous(labels = scales::percent)+
endeudamiento_filtro_graf
NA
NA
endeudamiento_all <- nocs_df %>%
# filter(company %in% empresas_AL$company ) %>%
ggplot(aes(year, liabilities, color = company))+
geom_point(alpha = 0.5)+
geom_line()+
labs(title = "Liabilities", y = "USD million")+
theme(legend.position="none")+
facet_wrap(~productionGroup)
# scale_y_continuous(labels = scales::percent)+
ggplotly(endeudamiento_all)
endeudamiento_al <- nocs_df %>%
filter(company %in% empresas_AL$company ) %>%
ggplot(aes(year, liabilities, color = company))+
geom_point(alpha = 0.5)+
geom_line()+
labs(title = "Liabilities", y = "USD million")+
theme(legend.position="none")+
facet_wrap(~productionGroup)
# scale_y_continuous(labels = scales::percent)+
ggplotly(endeudamiento_al)
endeudamiento <- filtro_emp_df %>%
filter(company != "Resto") %>%
ggplot(aes(year, liabilities, color = company))+
geom_point(alpha = 1, size = 3)+
geom_line()+
labs(title = "Liabilities", y = "USD million")+
theme(legend.position="bottom")
# scale_y_continuous(labels = scales::percent)+
endeudamiento
# writexl::write_xlsx(list(nocs_data = nocs_df ,
# tg_nocs= tg_media), path = "/Archivos/repos/hidrocarburos/analisis/resultados/argentina/NOC's.xlsx")
#cluster
data_cluster <- nocs_full %>%
filter(year == 2011, units == "USD million" ) %>%
select(company, productionGroup, indicatorName, observation) %>%
mutate(id = 1:nrow(.)) %>%
spread(key = indicatorName, value=observation) %>%
replace(is.na(.), 0 )
# na.omit()
# data_cluster_z <- scale(data_cluster[4:ncol(data_cluster)])
library(cluster)
datos_para_cluster = data_cluster[4:ncol(data_cluster)]
cantidad_clusters=2
CL = kmeans(scale(datos_para_cluster),cantidad_clusters)
datos$kmeans = CL$cluster